package com.fengxing.ams.web.plugin.jsoup;

import org.jsoup.helper.Validate;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

import com.fengxing.ams.pub.util.StringUtil;


/**
 The whitelist based HTML cleaner. Use to ensure that end-user provided HTML contains only the elements and attributes
 that you are expecting; no junk, and no cross-site scripting attacks!
 <p>
 The HTML cleaner parses the input as HTML and then runs it through a white-list, so the output HTML can only contain
 HTML that is allowed by the whitelist.
 </p>
 <p>
 It is assumed that the input HTML is a body fragment; the clean methods only pull from the source's body, and the
 canned white-lists only allow body contained tags.
 </p>
 <p>
 Rather than interacting directly with a Cleaner object, generally see the {@code clean} methods in {@link org.jsoup.Jsoup}.
 </p>
 */
public class Cleaner {
    private Whitelist whitelist;

    /**
     Create a new cleaner, that sanitizes documents using the supplied whitelist.
     @param whitelist white-list to clean with
     */
    public Cleaner(Whitelist whitelist) {
        Validate.notNull(whitelist);
        this.whitelist = whitelist;
    }

    /**
     Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist.
     The original document is not modified. Only elements from the dirt document's <code>body</code> are used.
     @param dirtyDocument Untrusted base document to clean.
     @return cleaned document.
     */
    public Document clean(Document dirtyDocument) {
        Validate.notNull(dirtyDocument);

        Document clean = Document.createShell(dirtyDocument.baseUri());
        if (dirtyDocument.body() != null) // frameset documents won't have a body. the clean doc will have empty body.
            copySafeNodes(dirtyDocument.body(), clean.body());

        return clean;
    }

    /**
     Determines if the input document is valid, against the whitelist. It is considered valid if all the tags and attributes
     in the input HTML are allowed by the whitelist.
     <p>
     This method can be used as a validator for user input forms. An invalid document will still be cleaned successfully
     using the {@link #clean(Document)} document. If using as a validator, it is recommended to still clean the document
     to ensure enforced attributes are set correctly, and that the output is tidied.
     </p>
     @param dirtyDocument document to test
     @return true if no tags or attributes need to be removed; false if they do
     */
    public boolean isValid(Document dirtyDocument) {
        Validate.notNull(dirtyDocument);

        Document clean = Document.createShell(dirtyDocument.baseUri());
        int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body());
        return numDiscarded == 0;
    }

    /**
     Iterates the input and copies trusted nodes (tags, attributes, text) into the destination.
     */
    private final class CleaningVisitor implements NodeVisitor {
        private int numDiscarded = 0;
        private final Element root;
        private Element destination; // current element to append nodes to

        private CleaningVisitor(Element root, Element destination) {
            this.root = root;
            this.destination = destination;
        }

        public void head(Node source, int depth) {
            if (source instanceof Element) {
                Element sourceEl = (Element) source;

                if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
                	boolean isEmptyP = false;
                	if (sourceEl.tagName().equals("p")) {
                		if (sourceEl.childNodeSize() == 1) {
                			Node node = sourceEl.childNode(0);
                			if (node instanceof TextNode) {
                				TextNode sourceText = (TextNode) node;
                				String text = sourceText.getWholeText();
                                text = text.replaceAll("\\s*", "").replaceAll("\u00A0", "");
                                if (text.equals("")) {
                                	isEmptyP = true;
                                }
                			}
                		} else if (sourceEl.childNodeSize() == 0) {
                			isEmptyP = true;
                		}
                	} else if (sourceEl.tagName().equals("a")) {
                		if (sourceEl.attr("name") == null || sourceEl.attr("name").equals("")) {
                			isEmptyP = true;
                		}
                	}
                        
                    if (isEmptyP) {
                    	numDiscarded++;
                    } else {
                    	ElementMeta meta = createSafeElement(sourceEl);
                        Element destChild = meta.el;
                        destination.appendChild(destChild);

                        numDiscarded += meta.numAttribsDiscarded;
                        destination = destChild;
                    }
                } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
                    numDiscarded++;
                }
            } else if (source instanceof TextNode) {
                TextNode sourceText = (TextNode) source;
                String text = sourceText.getWholeText();
                text = text.replaceAll("\\s*", "").replaceAll("\u00A0", "");
                if (!text.equals("")) {
                	TextNode destText = new TextNode(text, source.baseUri());
                    destination.appendChild(destText);
                }
            } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
              DataNode sourceData = (DataNode) source;
              DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri());
              destination.appendChild(destData);
            } else { // else, we don't care about comments, xml proc instructions, etc
                numDiscarded++;
            }
        }

        public void tail(Node source, int depth) {
            if (source instanceof Element && whitelist.isSafeTag(source.nodeName())) {
            	Element sourceEl = (Element) source;
            	boolean isEmptyP = false;
            	if (sourceEl.tagName().equals("p")) {
            		if (sourceEl.childNodeSize() == 1) {
            			Node node = sourceEl.childNode(0);
            			if (node instanceof TextNode) {
            				TextNode sourceText = (TextNode) node;
            				String text = sourceText.getWholeText();
                            text = text.replaceAll("\\s*", "").replaceAll("\u00A0", "");
                            if (text.equals("")) {
                            	isEmptyP = true;
                            }
            			}
            		} else if (sourceEl.childNodeSize() == 0) {
            			isEmptyP = true;
            		}
            	} else if (sourceEl.tagName().equals("a")) {
            		if (sourceEl.attr("name") == null || sourceEl.attr("name").equals("")) {
            			isEmptyP = true;
            		}
            	}
            	if (!isEmptyP)
            		destination = destination.parent(); // would have descended, so pop destination stack
            }
        }
    }

    private int copySafeNodes(Element source, Element dest) {
        CleaningVisitor cleaningVisitor = new CleaningVisitor(source, dest);
        NodeTraversor traversor = new NodeTraversor(cleaningVisitor);
        traversor.traverse(source);
        return cleaningVisitor.numDiscarded;
    }

    private ElementMeta createSafeElement(Element sourceEl) {
    	
        String sourceTag = sourceEl.tagName();
        if (sourceTag.equalsIgnoreCase("tr")) {
        	Element tableRoot = sourceEl;
        	Element trRoot = sourceEl;
        	while ((tableRoot = tableRoot.parent()) != null) {
        		if (tableRoot.tagName().equals("table"))
        			break;
        	}
        	Element tr = tableRoot.select("tr").get(0);
        	if (trRoot.equals(tr)) {
        		sourceTag = "thead";
        	}
        }
        if (sourceTag.equalsIgnoreCase("td")) {
        	Element tableRoot = sourceEl;
        	Element trRoot = sourceEl;
        	while ((tableRoot = tableRoot.parent()) != null) {
        		if (tableRoot.tagName().equals("table"))
        			break;
        	}
        	while ((trRoot = trRoot.parent()) != null) {
        		if (trRoot.tagName().equals("tr"))
        			break;
        	}
        	Element tr = tableRoot.select("tr").get(0);
        	if (trRoot.equals(tr)) {
        		sourceTag = "th";
        	}
        }
        Attributes destAttrs = new Attributes();
        Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
        int numDiscarded = 0;

        Attributes sourceAttrs = sourceEl.attributes();
        for (Attribute sourceAttr : sourceAttrs) {
            if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) {
            	destAttrs.put(sourceAttr);
            } else if (sourceEl.tagName().equalsIgnoreCase("p") 
            		&& sourceAttr.getKey().equalsIgnoreCase("style") 
            		&& sourceAttr.getValue().toLowerCase().contains("text-indent:") && !isTableChild(sourceEl)) {
            	sourceAttr.setValue("text-indent:" + StringUtil.subString(sourceAttr.getValue().toLowerCase(), "text-indent:", ";"));
            	destAttrs.put(sourceAttr);
            } else {
            	numDiscarded++;
            }
                
        }
        Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
        destAttrs.addAll(enforcedAttrs);

        return new ElementMeta(dest, numDiscarded);
    }

    private boolean isTableChild(Element sourceEl) {
    	Node node = sourceEl;
    	while ((node = node.parent()) != null) {
    		if (node instanceof Element) {
    			Element ele = (Element) node;
    			if (ele.tagName().equalsIgnoreCase("table"))
    				return true;
    		}
    	}
		return false;
	}

	private static class ElementMeta {
        Element el;
        int numAttribsDiscarded;

        ElementMeta(Element el, int numAttribsDiscarded) {
            this.el = el;
            this.numAttribsDiscarded = numAttribsDiscarded;
        }
    }

}
